On andromeda
Download protein file from genome
cd /data/putnamlab/zdellaert/Pdam-TagSeq/references
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.pep.faa.gz
gunzip Pocillopora_acuta_HIv2.genes.pep.faa.gz
cd ..
mkdir blast
cd blast
On personal computer
scp /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/data/Biomineralization_Toolkit_FScucchia/Biomineralization_Toolkit_FScucchia.fasta zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/
Biomineralization_Toolkit_FScucchia.fasta
On andromeda
nano Biomineralization_blast.sh
#!/bin/bash
#SBATCH --job-name="Pacuta_TRP_blast"
#SBATCH -t 240:00:00
#SBATCH --export=NONE
#SBATCH --mail-type=BEGIN,END,FAIL #email you when job starts, stops and/or fails
#SBATCH --mail-user=zdellaert@uri.edu #your email to send notifications
#SBATCH --mem=100GB
#SBATCH --error="blast_out_error"
#SBATCH --output="blast_out"
#SBATCH --account=putnamlab
#SBATCH -D /data/putnamlab/zdellaert/Pdam-TagSeq/blast/
#SBATCH --nodes=1 --ntasks-per-node=20
module load BLAST+/2.9.0-iimpi-2019b
makeblastdb -in ../references/Pocillopora_acuta_HIv2.genes.pep.faa -out Pacuta_prot -dbtype prot
blastp -query Biomineralization_Toolkit_FScucchia.fasta -db Pacuta_prot -out Biomineralization_blast_results.txt -outfmt 0
blastp -query Biomineralization_Toolkit_FScucchia.fasta -db Pacuta_prot -out Biomineralization_blast_results_tab.txt -outfmt 6 -max_target_seqs 1
sbatch Biomineralization_blast.sh
Errors:
Warning: [blastp] Examining 5 or more matches is recommended FASTA-Reader: Ignoring invalid residues at position(s): On line 2741: 378, 383, 386-390, 401, 417, 420-422, 431, 437-439, 443, 459-461 Warning: [blastp] Query_168 Gene: g13552, N.. : One or more O characters replaced by X for alignment score calculations at positions 382, 390, 392, 422
On personal computer:
scp zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/Biomineralization_blast_results.txt /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/output
scp zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/Biomineralization_blast_results_tab.txt /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/output
Now, will take the best Pacuta alignment for each Biomineralization Gene and match to the name of that gene and make the dataframe into a format to match with differentially expressed/frontloaded genes or modules.
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.37 R6_2.5.1 fastmap_1.2.0 xfun_0.47
## [5] cachem_1.1.0 knitr_1.48 htmltools_0.5.8.1 rmarkdown_2.28
## [9] lifecycle_1.0.4 cli_3.6.3 sass_0.4.9 jquerylib_0.1.4
## [13] compiler_4.3.2 rstudioapi_0.16.0 tools_4.3.2 evaluate_1.0.0
## [17] bslib_0.8.0 yaml_2.3.10 rlang_1.1.4 jsonlite_1.8.9
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)
Biomin_genes <- read_excel("../../../data/Biomineralization_Toolkit_FScucchia/Biomineralization_Toolkit_FScucchia.xlsx")
Biomin_genes <- Biomin_genes %>% select(-`blasted protein in Stylophora`)
Biomin_blast_results <- read.delim("../../../output/Biomineralization_blast_results_tab.txt", header=FALSE)
Biomin_blast_results_orig <- Biomin_blast_results %>% select(V1, V2) %>% distinct()
Biomin_blast_results_filt <- Biomin_blast_results %>% filter(V11 < 0.01) %>% select(V1, V2) %>% distinct()
Biomin_blast_results <- Biomin_blast_results_filt
# Merge data frames based on accessionnumber/geneID
merged_data <- Biomin_genes %>%
inner_join(Biomin_blast_results, by = c("accessionnumber/geneID" = "V1")) %>% rename("Pocillopora_acuta_best_hit" = "V2")
write.csv(merged_data, "../../../output/Biomin_blast_Pocillopora_acuta_best_hit.csv", row.names = F)
How many of our 9011 genes are represented in the Biomineralization genes?
DEGs <- read.csv(file="../../../output/glmmseq/signif_genes_normcts.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
#NOTE! This is not a file only with differentially expressed genes, this contains all of the genes in our dataset but also contains p-value information and fold change information to help determine which genes are signficant DEGs based on our model in glmmSeq
rownames(DEGs) <- DEGs$Gene
dim(DEGs)
## [1] 9011 75
Biomin_genes <- DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_genes$definition
## [1] "Mucin4-like protein"
## [2] "Sushi domain-containing"
## [3] "Mucin-4 [Stylophora pistillata]"
## [4] "mammalian ependymin-related protein 1-like [Stylophora pistillata]"
## [5] "uncharacterized protein LOC111337489 [Stylophora pistillata]"
## [6] "Viral inclusion protein"
## [7] "Annotated: Actin"
## [8] "plasma membrane calcium ATPase [Stylophora pistillata]"
## [9] "Hephaestin-like protein"
## [10] "hephaestin-like protein [Stylophora pistillata]"
## [11] "Annotated: Vitellogenin"
## [12] "clone g15888 vitellogenin-like protein gene"
## [13] "clone g1441 vitellogenin-like protein gene"
## [14] "vitellogenin-like [Stylophora pistillata]"
## [15] "Zona pellucida domain-containing protein"
## [16] "Annotated: Zona Pellucida (ZP domain-containing)"
## [17] "Acropora millepora clone B26 hypothetical protein p251_4"
## [18] "Zona pellucida"
## [19] "ZP domain-containing protein-like [Stylophora pistillata]"
## [20] "solute carrier family 4 member gamma [Stylophora pistillata]"
## [21] "Sacsin [Stylophora pistillata]"
## [22] "Complement C3 [Stylophora pistillata]"
## [23] "uncharacterized protein LOC111323869 [Stylophora pistillata]"
## [24] "uncharacterized protein LOC111345150 [Stylophora pistillata]"
## [25] "Major yolk protein"
## [26] "major yolk protein-like isoform X2 [Stylophora pistillata]"
## [27] "SAARP3"
## [28] "Acidic SOMP (Full-Length p27)"
## [29] "Acidic skeletal organic matrix protein (Acidic SOMP)"
## [30] "CARP1 [Stylophora pistillata]"
## [31] "Annotated: CARP1"
## [32] "Uncharacterized skeletal organic matrix protein-3 (USOMP-3)"
## [33] "Collagen alpha-1 chain"
## [34] "Annotated: Tolloid-Like"
## [35] "CUB domain-containing protein-like isoform X2 [Stylophora pistillata]"
## [36] "Protocadherin-like"
## [37] "chymotrypsin-like elastase family member 1 [Stylophora pistillata]"
## [38] "microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]"
## [39] "microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]"
## [40] "sodium bicarbonate cotransporter 3-like isoform X2"
## [41] "Poly [ADP-ribose] polymerase 11 [Stylophora pistillata]"
## [42] "carbonic anhydrase [Stylophora pistillata]"
## [43] "carbonic anhydrase 2"
## [44] "Annotated: Carbonic Anhydrase (STPCA2-1)"
## [45] "Annotated: CarbonicAnhyrase"
## [46] "Annotated: N/A, named it CARP6-partial"
## [47] "Annotated: USOMPS13"
## [48] "Stylophora pistillata clone g11702 hypothetical protein gene"
## [49] "Annotated: Kielin-Like"
## [50] "Kielin/chordin like"
## [51] "thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]"
## [52] "Flagellar associated protein"
## [53] "protein lingerer-like [Stylophora pistillata]"
## [54] "CUB and peptidase domain-containing protein 2-like [Stylophora pistillata]"
## [55] "Protein FAM208A [Stylophora pistillata]"
## [56] "spore wall protein 2-like isoform X3 [Stylophora pistillata]"
## [57] "L-type calcium channel alpha-1 subunit"
## [58] "Annotated: Fibronectin"
## [59] "Annotated: Fibronectin (Fibronectin-2)"
## [60] "Annotated: carbonic anhydrase (STPCA2-2)"
## [61] "Stylophora pistillata clone g19762 hypothetical protein gene"
## [62] "CARP3 [Stylophora pistillata]"
## [63] "galaxin2"
## [64] "galaxin"
## [65] "Galaxin 2"
## [66] "galaxin-like isoform X2 [Stylophora pistillata]"
## [67] "Annotated: Protoacadherin (PC4)"
## [68] "Annotated: Protocadherin (PC2)"
## [69] "Annotated: Protocadherin (PC3)"
## [70] "Annotated: Protocadherin (PC3)"
## [71] "Annotated: Cadherin"
## [72] "Annotated: Protocadherin (PC1)"
## [73] "Annotated: Protoacadherin (PC4)"
## [74] "Protocadherin fat-like"
## [75] "MAM and LDLr domain-containing protein"
## [76] "MAM and LDLr domain-containing protein"
## [77] "Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)"
## [78] "MAM and LDL-receptor domain- containing protein 2"
## [79] "MAM and LDL-receptor domain- containing protein 1"
## [80] "MAM domain anchor protein"
## [81] "MAM/LDL receptor domain containing protein"
## [82] "Zonadhesion-like precursor"
## [83] "MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]"
## [84] "band 3 anion transport protein-like"
## [85] "LOW QUALITY PROTEIN: uncharacterized protein LOC111321626 [Stylophora pistillata]"
## [86] "MAGUK p55 subfamily member 7-like [Stylophora pistillata]"
## [87] "uncharacterized protein LOC111344812 [Stylophora pistillata]"
## [88] "SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]"
## [89] "Late embryogenesis protein"
## [90] "EGF and laminin G domain-containing protein"
## [91] "EGF and laminin G domain-containing protein"
## [92] "Laminin G domain-containing protein"
## [93] "EGF and laminin G domain-containing protein"
## [94] "Annotated: EGF and LamininG-Like (EGF LamG2)"
## [95] "Annotated: EGF and LamininG-Like (EGF LamG1)"
## [96] "EGF and laminin G domain-containing protein"
## [97] "Contactin-associated protein"
## [98] "Neurexin"
## [99] "EGF and laminin G domain-containing protein-like [Stylophora pistillata]"
## [100] "Annotated: Protocadherin (PC5)"
## [101] "Protocadherin"
## [102] "endothelin-converting enzyme 1-like isoform X2 [Stylophora pistillata]"
## [103] "PHD finger protein 21A-like [Stylophora pistillata]"
## [104] "low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]"
## [105] "Acropora yongei Na+/Ca2+ exchanger"
## [106] "TSP-1 and VWA domain-containing"
## [107] "Annotated: Thrombospondin-like protein (Thrombospondin)"
## [108] "Annotated: Coadhesin"
## [109] "clone g9951 alpha collagen-like protein gene"
## [110] "Thrombospondin"
## [111] "Hemicentin"
## [112] "coadhesin-like isoform X3 [Stylophora pistillata]"
## [113] "Uncharacterized skeletal organic matrix protein-6 (USOMP6)"
## [114] "Integrin - alpha"
## [115] "hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]"
## [116] "von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]"
## [117] "collagenase 3-like [Stylophora pistillata]"
## [118] "digestive cysteine proteinase 1-like [Stylophora pistillata]"
## [119] "Cystein-rich"
## [120] "Uncharacterized skeletal organic matrix protein-2 (USOMP-2)"
## [121] "polycystic kidney disease 1-related (PKD1-related) protein"
## [122] "polycystic kidney disease 1-related (PKD1-related) protein"
## [123] "Adi-SAARP2"
## [124] "Skeletal acidic Asp-rich Protein 2 (SAARP2)"
## [125] "CARP9"
## [126] "skeletal aspartic acid-rich protein 2-like (CARP5)"
length(Biomin_genes$definition)
## [1] 126
Biomin_genes_names <- unique(Biomin_genes$Gene)
length(Biomin_genes_names)
## [1] 64
Biomin_genes %>% select(Gene, `accessionnumber/geneID`, definition, Ref)
## Gene
## 1 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 2 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 3 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g25351.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g7085.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g22851.t1
## 7 Pocillopora_acuta_HIv2___RNAseq.g14505.t1
## 8 Pocillopora_acuta_HIv2___RNAseq.g27976.t1
## 9 Pocillopora_acuta_HIv2___RNAseq.g27566.t1
## 10 Pocillopora_acuta_HIv2___RNAseq.g27566.t1
## 11 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 12 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 13 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 14 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 15 Pocillopora_acuta_HIv2___TS.g2710.t1
## 16 Pocillopora_acuta_HIv2___TS.g2710.t1
## 17 Pocillopora_acuta_HIv2___TS.g2710.t1
## 18 Pocillopora_acuta_HIv2___TS.g2710.t1
## 19 Pocillopora_acuta_HIv2___TS.g2710.t1
## 20 Pocillopora_acuta_HIv2___RNAseq.g15280.t1
## 21 Pocillopora_acuta_HIv2___RNAseq.g25214.t1
## 22 Pocillopora_acuta_HIv2___RNAseq.g8821.t1
## 23 Pocillopora_acuta_HIv2___RNAseq.g21232.t1
## 24 Pocillopora_acuta_HIv2___RNAseq.g20587.t2
## 25 Pocillopora_acuta_HIv2___RNAseq.g14653.t1
## 26 Pocillopora_acuta_HIv2___RNAseq.g14653.t1
## 27 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 28 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 29 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 30 Pocillopora_acuta_HIv2___RNAseq.g16280.t1
## 31 Pocillopora_acuta_HIv2___RNAseq.g16280.t1
## 32 Pocillopora_acuta_HIv2___TS.g23724.t1a
## 33 Pocillopora_acuta_HIv2___TS.g1359.t1
## 34 Pocillopora_acuta_HIv2___RNAseq.g26037.t1
## 35 Pocillopora_acuta_HIv2___RNAseq.g26035.t1
## 36 Pocillopora_acuta_HIv2___RNAseq.g3235.t1
## 37 Pocillopora_acuta_HIv2___RNAseq.g19288.t1
## 38 Pocillopora_acuta_HIv2___TS.g11659.t1
## 39 Pocillopora_acuta_HIv2___TS.g11659.t1
## 40 Pocillopora_acuta_HIv2___RNAseq.g7402.t1
## 41 Pocillopora_acuta_HIv2___RNAseq.g14663.t1a
## 42 Pocillopora_acuta_HIv2___TS.g12304.t1
## 43 Pocillopora_acuta_HIv2___TS.g12304.t1
## 44 Pocillopora_acuta_HIv2___TS.g12304.t1
## 45 Pocillopora_acuta_HIv2___TS.g12304.t1
## 46 Pocillopora_acuta_HIv2___TS.g5112.t1
## 47 Pocillopora_acuta_HIv2___TS.g26810.t1
## 48 Pocillopora_acuta_HIv2___TS.g26810.t1
## 49 Pocillopora_acuta_HIv2___RNAseq.g3780.t1
## 50 Pocillopora_acuta_HIv2___RNAseq.g3780.t1
## 51 Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 52 Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 53 Pocillopora_acuta_HIv2___RNAseq.g7908.t1
## 54 Pocillopora_acuta_HIv2___RNAseq.g21338.t1
## 55 Pocillopora_acuta_HIv2___RNAseq.g26846.t1a
## 56 Pocillopora_acuta_HIv2___RNAseq.g5807.t1
## 57 Pocillopora_acuta_HIv2___RNAseq.g21501.t1
## 58 Pocillopora_acuta_HIv2___RNAseq.g21517.t1
## 59 Pocillopora_acuta_HIv2___RNAseq.g21517.t1
## 60 Pocillopora_acuta_HIv2___RNAseq.g13824.t1
## 61 Pocillopora_acuta_HIv2___TS.g425.t1
## 62 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 63 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 64 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 65 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 66 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 67 Pocillopora_acuta_HIv2___TS.g6583.t1
## 68 Pocillopora_acuta_HIv2___TS.g6583.t1
## 69 Pocillopora_acuta_HIv2___TS.g6583.t1
## 70 Pocillopora_acuta_HIv2___TS.g6583.t1
## 71 Pocillopora_acuta_HIv2___TS.g6583.t1
## 72 Pocillopora_acuta_HIv2___TS.g6583.t1
## 73 Pocillopora_acuta_HIv2___TS.g6583.t1
## 74 Pocillopora_acuta_HIv2___TS.g6583.t1
## 75 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 76 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 77 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 78 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 79 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 80 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 81 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 82 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 83 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 84 Pocillopora_acuta_HIv2___TS.g27873.t1
## 85 Pocillopora_acuta_HIv2___RNAseq.g7668.t1
## 86 Pocillopora_acuta_HIv2___RNAseq.g15517.t1
## 87 Pocillopora_acuta_HIv2___RNAseq.g24861.t1b
## 88 Pocillopora_acuta_HIv2___RNAseq.g27376.t1
## 89 Pocillopora_acuta_HIv2___RNAseq.g16715.t1
## 90 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 91 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 92 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 93 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 94 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 95 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 96 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 97 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 98 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 99 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 100 Pocillopora_acuta_HIv2___RNAseq.g22388.t1
## 101 Pocillopora_acuta_HIv2___RNAseq.g22388.t1
## 102 Pocillopora_acuta_HIv2___RNAseq.g19211.t1
## 103 Pocillopora_acuta_HIv2___RNAseq.g1634.t1
## 104 Pocillopora_acuta_HIv2___RNAseq.g4085.t1
## 105 Pocillopora_acuta_HIv2___RNAseq.g24639.t1
## 106 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 107 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 108 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 109 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 110 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 111 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 112 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 113 Pocillopora_acuta_HIv2___TS.g22622.t1
## 114 Pocillopora_acuta_HIv2___TS.g15792.t1
## 115 Pocillopora_acuta_HIv2___TS.g15792.t1
## 116 Pocillopora_acuta_HIv2___RNAseq.g28226.t2
## 117 Pocillopora_acuta_HIv2___TS.g5338.t1
## 118 Pocillopora_acuta_HIv2___RNAseq.g18103.t1
## 119 Pocillopora_acuta_HIv2___TS.g1545.t1b
## 120 Pocillopora_acuta_HIv2___TS.g1545.t1b
## 121 Pocillopora_acuta_HIv2___RNAseq.g16433.t1
## 122 Pocillopora_acuta_HIv2___RNAseq.g16433.t1
## 123 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 124 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 125 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 126 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## accessionnumber/geneID
## 1 aug_v2a.09809.t1
## 2 P13_g6918
## 3 PFX18785.1
## 4 XP_022794351.1
## 5 XP_022799541.1
## 6 P4_g9861
## 7 Gene:g9094
## 8 AAR13013.1
## 9 aug_v2a.24015.t1
## 10 XP_022788227.1
## 11 Gene:g15294.t1
## 12 P24_g15888
## 13 P26_g1441
## 14 XP_022779720.1
## 15 aug_v2a.07627.t1
## 16 Gene:g907
## 17 JN631095.1
## 18 P21_g18277
## 19 XP_022806326.1
## 20 AJQ31790.1
## 21 PFX13778.1
## 22 PFX26597.1
## 23 XP_022783044.1
## 24 XP_022808163.1
## 25 P8_g9654
## 26 XP_022786918.1
## 27 aug_v2a.06327.t1
## 28 Gene:g13552
## 29 JR972076.1
## 30 AGE35225.2
## 31 Gene:g1484
## 32 JR997000.1
## 33 JR991083.1
## 34 Gene:g5735.t1
## 35 XP_022799089.1
## 36 aug_v2a.19518.t1
## 37 XP_022788730.1
## 38 XP_022809269.1
## 39 XP_022809270.1
## 40 XP_022801463.1
## 41 PFX27832.1
## 42 ACE95141.1
## 43 EU532164.1
## 44 Gene:g29033.t1
## 45 Gene:g29034.t1
## 46 Gene:g8396
## 47 Gene:g30385.t1
## 48 P16_g11702
## 49 Gene:g39770
## 50 P32_g5540
## 51 XP_022804785.1
## 52 P33_g8985
## 53 XP_022806664.1
## 54 XP_022780694.1
## 55 PFX15740.1
## 56 XP_022803872.1
## 57 AAD11470.1
## 58 Gene:g22569
## 59 Gene:g37058
## 60 Gene:g27814
## 61 P22_g19762
## 62 AGE35226.1
## 63 aug_v2a.15065.t1
## 64 aug_v2a.18631.t1
## 65 JR976690.1
## 66 XP_022794122.1
## 67 AGG36361.1
## 68 Gene:10186
## 69 Gene:g10187
## 70 Gene:g10188
## 71 Gene:g2115
## 72 Gene:g2116
## 73 Gene:g30
## 74 P9_g10811;P1_g11108;P10_g11107
## 75 aug_v2a.09968.t1
## 76 aug_v2a.09969.t1
## 77 Gene:g15955
## 78 JR994474.1
## 79 JT011118.1
## 80 P20_g6066
## 81 P34_g1714
## 82 P36_g13890
## 83 XP_022794736.1
## 84 XP_022788270.1
## 85 XP_022780303.1
## 86 XP_022789932.1
## 87 XP_022807807.1
## 88 XP_022806928.1
## 89 P28_g11651
## 90 aug_v2a.06122.t1
## 91 aug_v2a.06123.t1
## 92 aug_v2a.15580.t1
## 93 aug_v2a.24512.t1
## 94 Gene:g34749
## 95 Gene:g7086
## 96 JR980881.1
## 97 P19_g20041
## 98 P31_g20420
## 99 XP_022804012.1
## 100 Gene:g24177
## 101 P23_g1057
## 102 XP_022789591.1
## 103 XP_022790441.1
## 104 XP_022798902.1
## 105 MG182344.1
## 106 aug_v2a.05945.t1
## 107 Gene:g2829
## 108 Gene:g2829.t1
## 109 P14_g9951
## 110 P3_g12510
## 111 P5_g11674
## 112 XP_022783415.1
## 113 JR971508.1
## 114 P27_g18472
## 115 PFX30903.1
## 116 XP_022810585.1
## 117 XP_022783952.1
## 118 XP_022803524.1
## 119 aug_v2a.15064.t1
## 120 JR982706.1
## 121 aug_v2a.02830
## 122 aug_v2a.02830.t1
## 123 aug_v2a.01440.t1(aug_v2a.01441.t1)
## 124 JR991407.1
## 125 P15_g1532
## 126 XP_022780690.1
## definition
## 1 Mucin4-like protein
## 2 Sushi domain-containing
## 3 Mucin-4 [Stylophora pistillata]
## 4 mammalian ependymin-related protein 1-like [Stylophora pistillata]
## 5 uncharacterized protein LOC111337489 [Stylophora pistillata]
## 6 Viral inclusion protein
## 7 Annotated: Actin
## 8 plasma membrane calcium ATPase [Stylophora pistillata]
## 9 Hephaestin-like protein
## 10 hephaestin-like protein [Stylophora pistillata]
## 11 Annotated: Vitellogenin
## 12 clone g15888 vitellogenin-like protein gene
## 13 clone g1441 vitellogenin-like protein gene
## 14 vitellogenin-like [Stylophora pistillata]
## 15 Zona pellucida domain-containing protein
## 16 Annotated: Zona Pellucida (ZP domain-containing)
## 17 Acropora millepora clone B26 hypothetical protein p251_4
## 18 Zona pellucida
## 19 ZP domain-containing protein-like [Stylophora pistillata]
## 20 solute carrier family 4 member gamma [Stylophora pistillata]
## 21 Sacsin [Stylophora pistillata]
## 22 Complement C3 [Stylophora pistillata]
## 23 uncharacterized protein LOC111323869 [Stylophora pistillata]
## 24 uncharacterized protein LOC111345150 [Stylophora pistillata]
## 25 Major yolk protein
## 26 major yolk protein-like isoform X2 [Stylophora pistillata]
## 27 SAARP3
## 28 Acidic SOMP (Full-Length p27)
## 29 Acidic skeletal organic matrix protein (Acidic SOMP)
## 30 CARP1 [Stylophora pistillata]
## 31 Annotated: CARP1
## 32 Uncharacterized skeletal organic matrix protein-3 (USOMP-3)
## 33 Collagen alpha-1 chain
## 34 Annotated: Tolloid-Like
## 35 CUB domain-containing protein-like isoform X2 [Stylophora pistillata]
## 36 Protocadherin-like
## 37 chymotrypsin-like elastase family member 1 [Stylophora pistillata]
## 38 microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]
## 39 microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]
## 40 sodium bicarbonate cotransporter 3-like isoform X2
## 41 Poly [ADP-ribose] polymerase 11 [Stylophora pistillata]
## 42 carbonic anhydrase [Stylophora pistillata]
## 43 carbonic anhydrase 2
## 44 Annotated: Carbonic Anhydrase (STPCA2-1)
## 45 Annotated: CarbonicAnhyrase
## 46 Annotated: N/A, named it CARP6-partial
## 47 Annotated: USOMPS13
## 48 Stylophora pistillata clone g11702 hypothetical protein gene
## 49 Annotated: Kielin-Like
## 50 Kielin/chordin like
## 51 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 52 Flagellar associated protein
## 53 protein lingerer-like [Stylophora pistillata]
## 54 CUB and peptidase domain-containing protein 2-like [Stylophora pistillata]
## 55 Protein FAM208A [Stylophora pistillata]
## 56 spore wall protein 2-like isoform X3 [Stylophora pistillata]
## 57 L-type calcium channel alpha-1 subunit
## 58 Annotated: Fibronectin
## 59 Annotated: Fibronectin (Fibronectin-2)
## 60 Annotated: carbonic anhydrase (STPCA2-2)
## 61 Stylophora pistillata clone g19762 hypothetical protein gene
## 62 CARP3 [Stylophora pistillata]
## 63 galaxin2
## 64 galaxin
## 65 Galaxin 2
## 66 galaxin-like isoform X2 [Stylophora pistillata]
## 67 Annotated: Protoacadherin (PC4)
## 68 Annotated: Protocadherin (PC2)
## 69 Annotated: Protocadherin (PC3)
## 70 Annotated: Protocadherin (PC3)
## 71 Annotated: Cadherin
## 72 Annotated: Protocadherin (PC1)
## 73 Annotated: Protoacadherin (PC4)
## 74 Protocadherin fat-like
## 75 MAM and LDLr domain-containing protein
## 76 MAM and LDLr domain-containing protein
## 77 Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)
## 78 MAM and LDL-receptor domain- containing protein 2
## 79 MAM and LDL-receptor domain- containing protein 1
## 80 MAM domain anchor protein
## 81 MAM/LDL receptor domain containing protein
## 82 Zonadhesion-like precursor
## 83 MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]
## 84 band 3 anion transport protein-like
## 85 LOW QUALITY PROTEIN: uncharacterized protein LOC111321626 [Stylophora pistillata]
## 86 MAGUK p55 subfamily member 7-like [Stylophora pistillata]
## 87 uncharacterized protein LOC111344812 [Stylophora pistillata]
## 88 SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]
## 89 Late embryogenesis protein
## 90 EGF and laminin G domain-containing protein
## 91 EGF and laminin G domain-containing protein
## 92 Laminin G domain-containing protein
## 93 EGF and laminin G domain-containing protein
## 94 Annotated: EGF and LamininG-Like (EGF LamG2)
## 95 Annotated: EGF and LamininG-Like (EGF LamG1)
## 96 EGF and laminin G domain-containing protein
## 97 Contactin-associated protein
## 98 Neurexin
## 99 EGF and laminin G domain-containing protein-like [Stylophora pistillata]
## 100 Annotated: Protocadherin (PC5)
## 101 Protocadherin
## 102 endothelin-converting enzyme 1-like isoform X2 [Stylophora pistillata]
## 103 PHD finger protein 21A-like [Stylophora pistillata]
## 104 low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]
## 105 Acropora yongei Na+/Ca2+ exchanger
## 106 TSP-1 and VWA domain-containing
## 107 Annotated: Thrombospondin-like protein (Thrombospondin)
## 108 Annotated: Coadhesin
## 109 clone g9951 alpha collagen-like protein gene
## 110 Thrombospondin
## 111 Hemicentin
## 112 coadhesin-like isoform X3 [Stylophora pistillata]
## 113 Uncharacterized skeletal organic matrix protein-6 (USOMP6)
## 114 Integrin - alpha
## 115 hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]
## 116 von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]
## 117 collagenase 3-like [Stylophora pistillata]
## 118 digestive cysteine proteinase 1-like [Stylophora pistillata]
## 119 Cystein-rich
## 120 Uncharacterized skeletal organic matrix protein-2 (USOMP-2)
## 121 polycystic kidney disease 1-related (PKD1-related) protein
## 122 polycystic kidney disease 1-related (PKD1-related) protein
## 123 Adi-SAARP2
## 124 Skeletal acidic Asp-rich Protein 2 (SAARP2)
## 125 CARP9
## 126 skeletal aspartic acid-rich protein 2-like (CARP5)
## Ref
## 1 Takeuchi et al., 2016
## 2 Drake et al., 2013
## 3 Peled et al., 2020
## 4 Peled et al., 2020
## 5 Peled et al., 2020
## 6 Drake et al., 2013
## 7 Mummadisetti et al., 2021
## 8 Zoccola et al., 2004
## 9 Takeuchi et al., 2016
## 10 Peled et al., 2020
## 11 Mummadisetti et al., 2021
## 12 Drake et al., 2013
## 13 Drake et al., 2013
## 14 Peled et al., 2020
## 15 Takeuchi et al., 2016
## 16 Mummadisetti et al., 2021
## 17 Hayward et al., 2011
## 18 Drake et al., 2013
## 19 Peled et al., 2020
## 20 Zoccola et al., 2015
## 21 Peled et al., 2020
## 22 Peled et al., 2020
## 23 Peled et al., 2020
## 24 Peled et al., 2020
## 25 Drake et al., 2013
## 26 Peled et al., 2020
## 27 Takeuchi et al., 2016
## 28 Mummadisetti et al., 2021
## 29 Ramos-Silva et al., 2013
## 30 Mass et al., 2013
## 31 Mummadisetti et al., 2021
## 32 Ramos-Silva et al., 2013
## 33 Ramos-Silva et al., 2013
## 34 Mummadisetti et al., 2021
## 35 Peled et al., 2020
## 36 Takeuchi et al., 2016
## 37 Peled et al., 2020
## 38 Peled et al., 2020
## 39 Peled et al., 2020
## 40 Zoccola et al., 2015
## 41 Peled et al., 2020
## 42 Moya et al., 2008
## 43 Bertucci et al., 2011
## 44 Mummadisetti et al., 2021
## 45 Mummadisetti et al., 2021
## 46 Mummadisetti et al., 2021
## 47 Mummadisetti et al., 2021
## 48 Drake et al., 2013
## 49 Mummadisetti et al., 2021
## 50 Drake et al., 2013
## 51 Peled et al., 2020
## 52 Drake et al., 2013
## 53 Peled et al., 2020
## 54 Peled et al., 2020
## 55 Peled et al., 2020
## 56 Peled et al., 2020
## 57 Zoccola et al., 1999
## 58 Mummadisetti et al., 2021
## 59 Mummadisetti et al., 2021
## 60 Mummadisetti et al., 2021
## 61 Drake et al., 2013
## 62 Mass et al., 2013
## 63 Takeuchi et al., 2016
## 64 Takeuchi et al., 2016
## 65 Ramos-Silva et al., 2013
## 66 Peled et al., 2020
## 67 Drake et al., 2013
## 68 Mummadisetti et al., 2021
## 69 Mummadisetti et al., 2021
## 70 Mummadisetti et al., 2021
## 71 Mummadisetti et al., 2021
## 72 Mummadisetti et al., 2021
## 73 Mummadisetti et al., 2021
## 74 Drake et al., 2013
## 75 Takeuchi et al., 2016
## 76 Takeuchi et al., 2016
## 77 Mummadisetti et al., 2021
## 78 Ramos-Silva et al., 2013
## 79 Ramos-Silva et al., 2013
## 80 Drake et al., 2013
## 81 Drake et al., 2013
## 82 Drake et al., 2013
## 83 Peled et al., 2020
## 84 Zoccola et al., 2015
## 85 Peled et al., 2020
## 86 Peled et al., 2020
## 87 Peled et al., 2020
## 88 Peled et al., 2020
## 89 Drake et al., 2013
## 90 Takeuchi et al., 2016
## 91 Takeuchi et al., 2016
## 92 Takeuchi et al., 2016
## 93 Takeuchi et al., 2016
## 94 Mummadisetti et al., 2021
## 95 Mummadisetti et al., 2021
## 96 Ramos-Silva et al., 2013
## 97 Drake et al., 2013
## 98 Drake et al., 2013
## 99 Peled et al., 2020
## 100 Mummadisetti et al., 2021
## 101 Drake et al., 2013
## 102 Peled et al., 2020
## 103 Peled et al., 2020
## 104 Peled et al., 2020
## 105 Barron et al., 2018
## 106 Takeuchi et al., 2016
## 107 Mummadisetti et al., 2021
## 108 Mummadisetti et al., 2021
## 109 Drake et al., 2013
## 110 Drake et al., 2013
## 111 Drake et al., 2013
## 112 Peled et al., 2020
## 113 Ramos-Silva et al., 2013
## 114 Drake et al., 2013
## 115 Peled et al., 2020
## 116 Peled et al., 2020
## 117 Peled et al., 2020
## 118 Peled et al., 2020
## 119 Takeuchi et al., 2016
## 120 Ramos-Silva et al., 2013
## 121 Takeuchi et al., 2016
## 122 Takeuchi et al., 2016
## 123 Takeuchi et al., 2016
## 124 Ramos-Silva et al., 2013
## 125 Drake et al., 2013
## 126 Peled et al., 2020
#Biomin_genes %>% select(Gene, `accessionnumber/geneID`, definition, Ref, Origin, Treatment, Treatment.Origin) %>% View()
write.csv(Biomin_genes, "../../../output/Biomin_blast_Pocillopora_acuta_best_hit_glmmSeq.csv", row.names = F)
126/172 of the Biomineralization Genes are represented in our dataset of 9011 genes, matching to 64/9011 genes
Differentially expressed genes: are any of these Biomineralization genes?
Origin_DEGs <- DEGs %>% dplyr::filter(Origin < 0.05)
nrow(Origin_DEGs)
## [1] 840
Treatment_DEGs <- DEGs %>% dplyr::filter(Treatment < 0.05)
nrow(Treatment_DEGs)
## [1] 18
Interaction_DEGs <- DEGs %>% dplyr::filter(Treatment.Origin < 0.05)
nrow(Interaction_DEGs)
## [1] 30
Setting up for plotting genes, loading in results from glmmseq
library(glmmSeq)
results <- readRDS(file = "glmmSeq.rds") #load in RDS from previous step / previous iteration
results <- glmmQvals(results)
##
## Treatment
## ---------
## Not Significant Significant
## 8993 18
##
## Origin
## ------
## Not Significant Significant
## 8171 840
##
## Treatment:Origin
## ----------------
## Not Significant Significant
## 8981 30
source(file = "../Factor_ggmodelPlot.R")
plotColours <- c("skyblue","mediumseagreen")
modColours <- c("dodgerblue3","seagreen4")
Biomin_Origin_DEGs <- Origin_DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_Origin_DEGs$definition
## [1] "mammalian ependymin-related protein 1-like [Stylophora pistillata]"
## [2] "Annotated: Vitellogenin"
## [3] "clone g15888 vitellogenin-like protein gene"
## [4] "clone g1441 vitellogenin-like protein gene"
## [5] "vitellogenin-like [Stylophora pistillata]"
## [6] "uncharacterized protein LOC111323869 [Stylophora pistillata]"
## [7] "uncharacterized protein LOC111345150 [Stylophora pistillata]"
## [8] "carbonic anhydrase [Stylophora pistillata]"
## [9] "carbonic anhydrase 2"
## [10] "Annotated: Carbonic Anhydrase (STPCA2-1)"
## [11] "Annotated: CarbonicAnhyrase"
## [12] "thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]"
## [13] "protein lingerer-like [Stylophora pistillata]"
## [14] "Annotated: carbonic anhydrase (STPCA2-2)"
## [15] "Late embryogenesis protein"
length(Biomin_Origin_DEGs$definition)
## [1] 15
Biomin_Origin_DEG_names <- unique(Biomin_Origin_DEGs$Gene)
length(Biomin_Origin_DEG_names)
## [1] 9
Biomin_Origin_DEGs %>% select(Gene, `accessionnumber/geneID`, definition, Ref)
## Gene accessionnumber/geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g25351.t1 XP_022794351.1
## 2 Pocillopora_acuta_HIv2___TS.g13222.t1b Gene:g15294.t1
## 3 Pocillopora_acuta_HIv2___TS.g13222.t1b P24_g15888
## 4 Pocillopora_acuta_HIv2___TS.g13222.t1b P26_g1441
## 5 Pocillopora_acuta_HIv2___TS.g13222.t1b XP_022779720.1
## 6 Pocillopora_acuta_HIv2___RNAseq.g21232.t1 XP_022783044.1
## 7 Pocillopora_acuta_HIv2___RNAseq.g20587.t2 XP_022808163.1
## 8 Pocillopora_acuta_HIv2___TS.g12304.t1 ACE95141.1
## 9 Pocillopora_acuta_HIv2___TS.g12304.t1 EU532164.1
## 10 Pocillopora_acuta_HIv2___TS.g12304.t1 Gene:g29033.t1
## 11 Pocillopora_acuta_HIv2___TS.g12304.t1 Gene:g29034.t1
## 12 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 XP_022804785.1
## 13 Pocillopora_acuta_HIv2___RNAseq.g7908.t1 XP_022806664.1
## 14 Pocillopora_acuta_HIv2___RNAseq.g13824.t1 Gene:g27814
## 15 Pocillopora_acuta_HIv2___RNAseq.g16715.t1 P28_g11651
## definition
## 1 mammalian ependymin-related protein 1-like [Stylophora pistillata]
## 2 Annotated: Vitellogenin
## 3 clone g15888 vitellogenin-like protein gene
## 4 clone g1441 vitellogenin-like protein gene
## 5 vitellogenin-like [Stylophora pistillata]
## 6 uncharacterized protein LOC111323869 [Stylophora pistillata]
## 7 uncharacterized protein LOC111345150 [Stylophora pistillata]
## 8 carbonic anhydrase [Stylophora pistillata]
## 9 carbonic anhydrase 2
## 10 Annotated: Carbonic Anhydrase (STPCA2-1)
## 11 Annotated: CarbonicAnhyrase
## 12 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 13 protein lingerer-like [Stylophora pistillata]
## 14 Annotated: carbonic anhydrase (STPCA2-2)
## 15 Late embryogenesis protein
## Ref
## 1 Peled et al., 2020
## 2 Mummadisetti et al., 2021
## 3 Drake et al., 2013
## 4 Drake et al., 2013
## 5 Peled et al., 2020
## 6 Peled et al., 2020
## 7 Peled et al., 2020
## 8 Moya et al., 2008
## 9 Bertucci et al., 2011
## 10 Mummadisetti et al., 2021
## 11 Mummadisetti et al., 2021
## 12 Peled et al., 2020
## 13 Peled et al., 2020
## 14 Mummadisetti et al., 2021
## 15 Drake et al., 2013
15/172 of the Biomineralization Genes are represented in the Origin DEGS, and these are 9 Pocillopora genes (some of the 9 have matches to multiple Biomineralization Genes) out of the 64 that are matching to Biomineralization Genes (9/64)
Pocillopora_acuta_HIv2___TS.g13222.t1b is a best match for: - Gene:g15294.t1 Annotated: Vitellogenin - P24_g15888 clone g15888 vitellogenin-like protein gene - P26_g1441 clone g1441 vitellogenin-like protein gene - XP_022779720.1 vitellogenin-like [Stylophora pistillata]
Pocillopora_acuta_HIv2___TS.g12304.t1 is a best match for: - ACE95141.1 carbonic anhydrase [Stylophora pistillata] - EU532164.1 carbonic anhydrase 2 - Gene:g29033.t1 Annotated: Carbonic Anhydrase (STPCA2-1) - Gene:g29034.t1 Annotated: CarbonicAnhyrase
for (i in Biomin_Origin_DEG_names) {print(Factor_ggmodelPlot(results,
geneName = i,
x1var = "Treatment",
x2var="Origin", addBox = T,
xlab = "Treatment and Origin",
title = i,
colours = plotColours,
lineColours = plotColours,
modelColours = modColours,
modelSize = 3))}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Biomin_Treatment_DEGs <- Treatment_DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_Treatment_DEGs$definition
## character(0)
0/172 of the Biomineralization Genes are represented in the Treatment DEGS
Biomin_Interaction_DEGs <- Interaction_DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_Interaction_DEGs$definition
## character(0)
0/172 of the Biomineralization Genes are represented in the Interaction DEGS
Frontloaded genes!
FRONTs <- read.csv(file="../../../output/glmmseq/frontloaded_genes.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
Biomin_FRONTs <- FRONTs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
length(Biomin_FRONTs$definition) #how many biomineralization genes are represented
## [1] 56
Biomin_FRONTs_names <- unique(Biomin_FRONTs$Gene)
length(Biomin_FRONTs_names) #how many unique P. acuta genes is this? some match to more than one biomineralization gene
## [1] 23
Biomin_FRONTs <- Biomin_FRONTs %>% select(`accessionnumber/geneID`, definition, Ref, Gene)
#Output list of frontloaded genes with Biomineralization gene info
write.csv(Biomin_FRONTs, "../../../output/Biomin_frontloaded.csv", row.names = F)
56/172 of the Biomineralization Genes are represented in the Frontloaded genes
This is 23 genes, some of which are mapping to multiple Biomineralization genes, out of the 56 that are matching to Biomineralization Genes (23/56)
for (i in Biomin_FRONTs_names) {print(Factor_ggmodelPlot(results,
geneName = i,
x1var = "Treatment",
x2var="Origin", addBox = T,
xlab = "Treatment and Origin",
title = i,
colours = plotColours,
lineColours = plotColours,
modelColours = modColours,
modelSize = 3))}
READY <- read.csv(file="../../../output/glmmseq/frontloaded_genes_plotting.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
READY$color <- rep('gray', nrow(READY))
#These are "frontloaded, need a different color:
READY$color[READY$yall > 1 & READY$xall_1 < 1] <- 'gray40' #frontloaded genes
READY$color[READY$Gene %in% merged_data$Pocillopora_acuta_best_hit] <- '#4D004B' #biomin genes
READY$color[READY$yall > 1 & READY$xall_1 < 1 & READY$Gene %in% merged_data$Pocillopora_acuta_best_hit] <- '#df74ff' #frontloaded biomin genes
READY_cutoff <- READY %>% dplyr::filter(yall < 6) %>% dplyr::filter(xall_1 < 6)
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.3.3
P <- READY_cutoff %>%
ggplot(aes(x=xall_1, y=yall)) +
#geom_point(colour = READY_cutoff$color, alpha=0.8) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color != "#4D004B"), colour = subset(READY_cutoff$color, READY_cutoff$color != "#4D004B"), alpha = 0.7) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "#4D004B"), colour = subset(READY_cutoff$color, READY_cutoff$color == "#4D004B"), alpha = 1) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "#df74ff"), colour = subset(READY_cutoff$color, READY_cutoff$color == "#df74ff"), alpha = 1) +
#geom_text(data = subset(READY_cutoff, READY_cutoff$color == "#df74ff"), hjust=0, vjust=0)+
geom_text_repel(data = subset(READY_cutoff, READY_cutoff$color == "#df74ff"), aes(label = Gene),
box.padding = 0.0,
point.padding = 0.0,
segment.color = 'grey50', size=3, max.overlaps = 18) +
theme_classic() +
stat_smooth(method = "lm", formula = y ~ x + poly(x, 2) - 1) +
geom_vline(xintercept=1, linetype="dotted") +
geom_hline(yintercept=1, linetype="dotted") +
labs(y= "Flat to Slope (Conditioned to naive) control ratio",
x = "Flat to Slope (Conditioned to naive) foldchange ratio",
title = "Frontloaded genes") +
scale_x_continuous(limits = c(0,6.1),expand = c(0, 0)) + scale_y_continuous(limits = c(0,6.1), expand = c(0, 0)) +
annotate("rect", xmin = 0, xmax = 1, ymin = 1, ymax = 6.1, alpha = .2) +
annotate("rect", xmin = 0, xmax = 1, ymin = 0, ymax = 1,alpha = .5)
P
ggsave("../../../output/glmmseq/frontloaded_figures/Biomin_frontloaded.pdf", plot =P)
## Saving 7 x 5 in image
Biomin_front_labels <- Biomin_FRONTs %>% select(Gene, definition) %>% group_by(Gene) %>%
summarise(
def = paste(unique(definition), collapse = ";")
)
head(Biomin_front_labels)
## # A tibble: 6 × 2
## Gene def
## <chr> <chr>
## 1 Pocillopora_acuta_HIv2___RNAseq.g13824.t1 Annotated: carbonic anhydrase (STPC…
## 2 Pocillopora_acuta_HIv2___RNAseq.g14653.t1 Major yolk protein;major yolk prote…
## 3 Pocillopora_acuta_HIv2___RNAseq.g15280.t1 solute carrier family 4 member gamm…
## 4 Pocillopora_acuta_HIv2___RNAseq.g16280.t1 CARP1 [Stylophora pistillata];Annot…
## 5 Pocillopora_acuta_HIv2___RNAseq.g16433.t1 polycystic kidney disease 1-related…
## 6 Pocillopora_acuta_HIv2___RNAseq.g16715.t1 Late embryogenesis protein
Biomin_front_labels <- Biomin_front_labels %>% left_join(READY, by = "Gene") %>% select(Gene, def, xall_1, yall) %>% arrange(-yall)
The highest value for yall of a biomineralization gene is 12.4.
We can’t label all 23 genes on the plot, so let’s choose all the ones with a yall > 1.5 (9 genes)
max(Biomin_front_labels$yall)
## [1] 12.35508
Biomin_front_labels_subset <- Biomin_front_labels %>% filter(yall>1.5)
nrow(Biomin_front_labels_subset)
## [1] 9
names_biomin <- Biomin_front_labels_subset %>% select(def) %>% as.vector()
names_biomin_modified <- c("Carbonic Anhydrase (STPCA2-2)",
"digestive cysteine proteinase 1-like",
"Late embryogenesis protein",
"von Willebrand factor D and EGF domain-containing protein",
"Hephaestin-like protein",
"Carbonic Anhydrase (STPCA2-1)",
"PKD1-related protein",
"USOMP-2",
"SLC4 gamma")
Biomin_front_labels_subset <- Biomin_front_labels_subset %>% mutate(names_biomin_modified= names_biomin_modified)
Plotting all points < 15
READY_cutoff <- READY %>% dplyr::filter(yall < 15) %>% dplyr::filter(xall_1 < 6)
READY_cutoff <- READY_cutoff %>% left_join(select(Biomin_front_labels_subset, c(Gene, names_biomin_modified)), by = "Gene")
P <- READY_cutoff %>%
ggplot(aes(x=xall_1, y=yall)) +
#geom_point(colour = READY_cutoff$color, alpha=0.8) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color != "#4D004B"), colour = subset(READY_cutoff$color, READY_cutoff$color != "#4D004B"), alpha = 0.7) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "#4D004B"), colour = subset(READY_cutoff$color, READY_cutoff$color == "#4D004B"), alpha = 1) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "#df74ff"), colour = subset(READY_cutoff$color, READY_cutoff$color == "#df74ff"), alpha = 1) +
#geom_text(data = subset(READY_cutoff, READY_cutoff$color == "#df74ff"), hjust=0, vjust=0)+
theme_classic() +
geom_vline(xintercept=1, linetype="dotted") +
geom_hline(yintercept=1, linetype="dotted") +
labs(y= "Flat to Slope (Conditioned to naive) control ratio",
x = "Flat to Slope (Conditioned to naive) foldchange ratio",
title = "Frontloaded genes") +
scale_x_continuous(limits = c(0,6.1),expand = c(0, 0)) + scale_y_continuous(limits = c(0,15.1), expand = c(0, 0)) +
annotate("rect", xmin = 0, xmax = 1, ymin = 1, ymax = 15.1, alpha = .2) +
annotate("rect", xmin = 0, xmax = 1, ymin = 0, ymax = 1,alpha = .5) +
geom_label_repel(data = subset(READY_cutoff, READY_cutoff$Gene %in% Biomin_front_labels_subset$Gene), aes(label = Gene),
segment.color = 'black', size=3, nudge_y = 0.5, nudge_x = 1.5, force = 25)
P
ggsave("../../../output/glmmseq/frontloaded_figures/Biomin_frontloaded_labelled.pdf", plot =P, width = 7, height = 4)
P <- READY_cutoff %>%
ggplot(aes(x=xall_1, y=yall)) +
#geom_point(colour = READY_cutoff$color, alpha=0.8) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color != "#4D004B"), colour = subset(READY_cutoff$color, READY_cutoff$color != "#4D004B"), alpha = 0.7) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "#4D004B"), colour = subset(READY_cutoff$color, READY_cutoff$color == "#4D004B"), alpha = 1) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "#df74ff"), colour = subset(READY_cutoff$color, READY_cutoff$color == "#df74ff"), alpha = 1) +
#geom_text(data = subset(READY_cutoff, READY_cutoff$color == "#df74ff"), hjust=0, vjust=0)+
theme_classic() +
geom_vline(xintercept=1, linetype="dotted") +
geom_hline(yintercept=1, linetype="dotted") +
labs(y= "Flat to Slope (Conditioned to naive) control ratio",
x = "Flat to Slope (Conditioned to naive) foldchange ratio",
title = "Frontloaded genes") +
scale_x_continuous(limits = c(0,6.1),expand = c(0, 0)) + scale_y_continuous(limits = c(0,15.1), expand = c(0, 0)) +
annotate("rect", xmin = 0, xmax = 1, ymin = 1, ymax = 15.1, alpha = .2) +
annotate("rect", xmin = 0, xmax = 1, ymin = 0, ymax = 1,alpha = .5) +
geom_label_repel(data = subset(READY_cutoff, READY_cutoff$Gene %in% Biomin_front_labels_subset$Gene), aes(label = names_biomin_modified),
segment.color = 'black', size=3, nudge_y = 0.5, nudge_x = 1.5, force = 25)
P
ggsave("../../../output/glmmseq/frontloaded_figures/Biomin_frontloaded_labelled_definitions.pdf", plot =P, width = 7, height = 4)
VST reaction norm for just frontloaded biomineralization genes
#load in vst normalized gene expression
vst <- read.csv(file="../../../output/glmmseq/vst.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
Lets also pull in our metadata
coldata <- read.csv("../../../../TagSeq_Submission/RNA Submission Sample List metadata.csv") #read in metadata file
coldata <- plyr::rename(coldata, c("Sample.Name"="Coral_ID")) #Make a column that represents the colonies
coldata$Colony <- gsub("A", "", coldata$Coral_ID) #ID which sample is from which colony by removing letters after colony name
coldata$Colony <- gsub("B", "", coldata$Colony) #ID which sample is from which colony by removing letters after colony name
coldata$Colony <- gsub("C", "", coldata$Colony) #ID which sample is from which colony by removing letters after colony name
coldata$Colony <- gsub("D", "", coldata$Colony) #ID which sample is from which colony by removing letters after colony name
coldata$Origin <- factor(coldata$Origin) #set variables to factors
coldata$Colony <- factor(coldata$Colony) #set variables to factors
coldata$Treatment <- factor(coldata$Treatment) #set variables to factors
coldata <- coldata %>% filter(Coral_ID != c("RF16A","RF16C")) #removed outlier rows from metadata
head(coldata)
## Coral_ID Origin Treatment Colony
## 1 RF13B Flat Variable RF13
## 2 RF13D Flat Stable RF13
## 3 RF14B Flat Variable RF14
## 4 RF14C Flat Stable RF14
## 5 RF15B Flat Variable RF15
## 6 RF15D Flat Stable RF15
# reaction norm data config and plotting
vst_front <- vst[which(vst$Gene %in% Biomin_FRONTs_names),]
vst_front <- vst_front %>% reshape2::melt(id.var = 'Gene') %>%
dplyr::rename(Coral_ID = variable)
vst_front_Merge <- merge(vst_front, coldata, by = 'Coral_ID') %>%
mutate(group = paste(Treatment, Origin, sep = "_"))
vst_summary <- vst_front_Merge %>%
group_by(Gene, Treatment, Origin, group) %>%
summarise(mean_val = mean(value),
sd_val = sd(value),
se_val = sd_val/sqrt(n())) %>%
ungroup()
## `summarise()` has grouped output by 'Gene', 'Treatment', 'Origin'. You can
## override using the `.groups` argument.
group_summary <- vst_summary %>%
group_by(Treatment, Origin) %>%
summarise(meanExp = mean(mean_val),
sdExp = sd(mean_val),
n = n(),
se = sdExp/sqrt(n))
## `summarise()` has grouped output by 'Treatment'. You can override using the
## `.groups` argument.
# anova
ANOVA_moderate <- aov(lm(mean_val~as.character(group), data = vst_summary))
summary(ANOVA_moderate) # as.character(group) 3 1.66 0.554 0.167 0.918
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(group) 3 1.66 0.554 0.167 0.918
## Residuals 88 292.07 3.319
TukeyHSD(ANOVA_moderate)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = lm(mean_val ~ as.character(group), data = vst_summary))
##
## $`as.character(group)`
## diff lwr upr p adj
## Stable_Slope-Stable_Flat -0.34210947 -1.749000 1.064781 0.9198326
## Variable_Flat-Stable_Flat -0.05514339 -1.462034 1.351747 0.9996099
## Variable_Slope-Stable_Flat -0.21219166 -1.619082 1.194699 0.9789810
## Variable_Flat-Stable_Slope 0.28696608 -1.119924 1.693856 0.9505021
## Variable_Slope-Stable_Slope 0.12991781 -1.276973 1.536808 0.9949917
## Variable_Slope-Variable_Flat -0.15704827 -1.563939 1.249842 0.9912446
# diff lwr upr p adj
# Stable_Slope-Stable_Flat -0.34210947 -1.749000 1.064781 0.9198326
# Variable_Flat-Stable_Flat -0.05514339 -1.462034 1.351747 0.9996099
# Variable_Slope-Stable_Flat -0.21219166 -1.619082 1.194699 0.9789810
# Variable_Flat-Stable_Slope 0.28696608 -1.119924 1.693856 0.9505021
# Variable_Slope-Stable_Slope 0.12991781 -1.276973 1.536808 0.9949917
# Variable_Slope-Variable_Flat -0.15704827 -1.563939 1.249842 0.9912446
#Chi squre test
library(lsmeans)
## Loading required package: emmeans
## Warning: package 'emmeans' was built under R version 4.3.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
## The 'lsmeans' package is now basically a front end for 'emmeans'.
## Users are encouraged to switch the rest of the way.
## See help('transition') for more information, including how to
## convert old 'lsmeans' objects and scripts to work with 'emmeans'.
model <- glm(mean_val ~ group, data = vst_summary)
lsmeans_results <- lsmeans(model, pairwise ~ group)
posthoc_results <- pairs(lsmeans_results)
posthoc_results
## contrast estimate SE df t.ratio p.value
## Stable_Flat - Stable_Slope 0.3421 0.537 88 0.637 0.9198
## Stable_Flat - Variable_Flat 0.0551 0.537 88 0.103 0.9996
## Stable_Flat - Variable_Slope 0.2122 0.537 88 0.395 0.9790
## Stable_Slope - Variable_Flat -0.2870 0.537 88 -0.534 0.9505
## Stable_Slope - Variable_Slope -0.1299 0.537 88 -0.242 0.9950
## Variable_Flat - Variable_Slope 0.1570 0.537 88 0.292 0.9912
##
## P value adjustment: tukey method for comparing a family of 4 estimates
# contrast estimate SE df t.ratio p.value
# Stable_Flat - Stable_Slope 0.3421 0.537 88 0.637 0.9198
# Stable_Flat - Variable_Flat 0.0551 0.537 88 0.103 0.9996
# Stable_Flat - Variable_Slope 0.2122 0.537 88 0.395 0.9790
# Stable_Slope - Variable_Flat -0.2870 0.537 88 -0.534 0.9505
# Stable_Slope - Variable_Slope -0.1299 0.537 88 -0.242 0.9950
# Variable_Flat - Variable_Slope 0.1570 0.537 88 0.292 0.9912
#######
ggplot(group_summary, aes(x = Treatment, y = meanExp, color = Origin)) +
geom_errorbar(aes(ymin = meanExp - se, ymax = meanExp + se, color = Origin), width = 0.05) +
geom_line(aes(group = Origin), color = "darkgrey") +
geom_point(size = 3) +
labs(x = "Treatment", y = "VST Mean Gene Expression (mean +- SE)") +
theme_minimal()
P2 <- ggplot(group_summary, aes(x = Treatment, y = meanExp, fill = Origin, group = Origin)) +
geom_line(aes(linetype = Origin), size = 0.5) +
geom_pointrange(aes(ymin=meanExp-se, ymax=meanExp+se, shape=Origin, fill=Origin)) +
scale_shape_manual(values=c(1, 16))+
scale_fill_manual(values=c('black','white')) +
theme_classic() +
scale_linetype_manual(values = c("Slope" = "solid", "Flat" = "dashed")) +
labs(y= "mean SE VST expression",
x = "Treatment",
title = "Mean VST Expression of Frontloaded Biomineralization-Related Genes") +
theme(text = element_text(size=12)) +
#scale_y_continuous(limits = c(7.28,7.48), breaks = seq(7.2,7.5, by = .1)) +
#annotate(geom="text", x=0.94, y=7.31, label="b", color="black",size=4) +
#annotate(geom="text", x=0.94, y=7.46, label="a", color="black",size=4) +
#annotate(geom="text", x=2.06, y=7.39, label="c", color="black",size=4) +
#annotate(geom="text", x=2.09, y=7.42, label="a,c", color="black",size=4) +
annotate(geom="text", x = 2.3, y = 7.9, label=paste("N = ",group_summary$n[1]), size = 5) + guides(fill = FALSE, shape = FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggsave("../../../output/glmmseq/frontloaded_figures/VST_Biomin_frontloaded.pdf", plot =P2, width= 7, height = 5)
# Normality test
shapiro.test(vst_summary$mean_val[vst_summary$Treatment == "Stable" & vst_summary$Origin == "Slope"])
##
## Shapiro-Wilk normality test
##
## data: vst_summary$mean_val[vst_summary$Treatment == "Stable" & vst_summary$Origin == "Slope"]
## W = 0.86023, p-value = 0.004194
shapiro.test(vst_summary$mean_val[vst_summary$Treatment == "Variable" & vst_summary$Origin == "Slope"])
##
## Shapiro-Wilk normality test
##
## data: vst_summary$mean_val[vst_summary$Treatment == "Variable" & vst_summary$Origin == "Slope"]
## W = 0.85356, p-value = 0.003159
shapiro.test(vst_summary$mean_val[vst_summary$Treatment == "Stable" & vst_summary$Origin == "Flat"])
##
## Shapiro-Wilk normality test
##
## data: vst_summary$mean_val[vst_summary$Treatment == "Stable" & vst_summary$Origin == "Flat"]
## W = 0.8475, p-value = 0.002452
shapiro.test(vst_summary$mean_val[vst_summary$Treatment == "Variable" & vst_summary$Origin == "Flat"])
##
## Shapiro-Wilk normality test
##
## data: vst_summary$mean_val[vst_summary$Treatment == "Variable" & vst_summary$Origin == "Flat"]
## W = 0.84126, p-value = 0.001896
# Data is not normally distributed
# Perform Kruskal-Wallis test
kruskal_test <- kruskal.test(mean_val ~ group, data = vst_summary)
# Check the Kruskal-Wallis test results
print(kruskal_test)
##
## Kruskal-Wallis rank sum test
##
## data: mean_val by group
## Kruskal-Wallis chi-squared = 1.33, df = 3, p-value = 0.722
library(dunn.test)
# Conduct Dunn's post-hoc test
posthoc_dunn <- dunn.test(vst_summary$mean_val, g = vst_summary$group, method = "bonferroni")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 1.33, df = 3, p-value = 0.72
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | Stable_F Stable_S Variable
## ---------+---------------------------------
## Stable_S | 1.016001
## | 0.9289
## |
## Variable | 0.187739 -0.828261
## | 1.0000 1.0000
## |
## Variable | 0.728870 -0.287130 0.541130
## | 1.0000 1.0000 1.0000
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
# Check the post-hoc test results
print(posthoc_dunn)
## $chi2
## [1] 1.33002
##
## $Z
## [1] 1.0160010 0.1877393 -0.8282617 0.7288703 -0.2871307 0.5411310
##
## $P
## [1] 0.1548145 0.4255405 0.2037612 0.2330405 0.3870061 0.2942087
##
## $P.adjusted
## [1] 0.9288867 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
##
## $comparisons
## [1] "Stable_Flat - Stable_Slope" "Stable_Flat - Variable_Flat"
## [3] "Stable_Slope - Variable_Flat" "Stable_Flat - Variable_Slope"
## [5] "Stable_Slope - Variable_Slope" "Variable_Flat - Variable_Slope"